package com.etl.jdbc.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created with IntelliJ IDEA.
 * Description:
 *
 * HTML正则表达式
 *
 * User: Locker1995
 * Date: 2018-03-28
 * Time: 4:35 PM
 */
public class HTMLRegexpUtil {


    private final static String regxpForHtml = "<([^>]*)>"; // 过滤所有以<开头以>结尾的标签


    /**
     *
     * 基本功能：过滤所有以"<"开头以">"结尾的标签
     * <p>
     *
     * @param str
     * @return String
     */
    public static String filterHtml(String str) {
        Pattern pattern = Pattern.compile(regxpForHtml);
        Matcher matcher = pattern.matcher(str);
        StringBuffer sb = new StringBuffer();
        boolean result1 = matcher.find();
        while (result1) {
            matcher.appendReplacement(sb, "");
            result1 = matcher.find();
        }
        matcher.appendTail(sb);
        return sb.toString();
    }


    /**
     *
     * 基本功能：过滤指定标签
     * <p>
     *
     * @param str
     * @param tag
     *            指定标签
     * @return String
     */
    public static String fiterHtmlTag(String str, String tag) {
        String regxp = "<\\s*" + tag + "\\s+([^>]*)\\s*>";
        Pattern pattern = Pattern.compile(regxp);
        Matcher matcher = pattern.matcher(str);
        StringBuffer sb = new StringBuffer();
        boolean result1 = matcher.find();
        while (result1) {
            matcher.appendReplacement(sb, "");
            result1 = matcher.find();
        }
        matcher.appendTail(sb);
        return sb.toString();
    }


    public static void main(String[] args){

        String str="<div>BPSC created a global survey in the team site </div>\n" +
                "<div><a href=\"http://i2hq/sites/isd/CORPBPSHKG/Lists/DD_Survey_EUT/overview.aspx\">http://i2hq/sites/isd/CORPBPSHKG/Lists/DD_Survey_EUT/overview.aspx</a>.<br></div>\n" +
                "<div>and the access should be given to OOCL domine users. However users STEPHANIE BALZER (MKT-M&amp;S-MAT/BSL)<br>cannot access into the survey, also we have explicitly assigned the access against his email ID, but still does not work. </div>\n" +
                "<div>&nbsp;</div>\n" +
                "<div>Please check and advise. Thanks/eve</div>";


        System.out.println(HTMLRegexpUtil.filterHtml(str));


    }

}
